Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.

We now have the following data sets:

1 Kernel Density Estimates of the marginals

df1 <- melt(as.matrix(fs))
names(df1) <- c("ind","channel","value")
df1$type <- factor(rep(ffchannel,each=dim(fs)[1]),levels=levels(ffchannel))

lvo <- c(1:5,7:10,19,22,11:16,6,17,18,20,21,23,24)
levels(df1$channel)<-levels(df1$channel)[lvo]

ts <- 22

gg1 <- ggplot(df1, aes(x=value)) + 
    scale_color_manual(values=ccol[lvo]) +
    scale_fill_manual(values=ccol[lvo]) +
    geom_histogram(aes(y=..density..,group=channel,colour=channel),bins=100) +
    geom_density(aes(group=channel, color=channel),size=1.5) +
    facet_wrap( ~ channel, scale='free', ncol=6) +
    theme(plot.title=element_text(size=ts),
          axis.title.x=element_text(size=ts),
          axis.title.y=element_text(size=ts),
          legend.title=element_text(size=ts),
          legend.text=element_text(size=ts-2),
          axis.text=element_text(size=ts-2),
          strip.text=element_text(size=ts), 
          legend.position='none')+
    ggtitle("Kernel Density Estimates of `fs` data.")

print(gg1)
Figure 1: Kernel density estimates for each channel, on fs data.

2 Level 0

dat <- fs

2.1 Heat maps (Lv 0):

## Formatting data for heatmap
aggp <- apply(dat, 2, mean)
aggp <- t(cbind(aggp, aggp))

The following are heatmaps generated from clustering via K-means++ (at level 1)

heatmap.2(as.matrix(aggp),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.") 
#  [1] "#197300"    "#197300"    "#197300"    "#197300"    "#197300"   
#  [6] "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"   
# [11] "#5ed155"    "#660000"    "#660000"    "#660000"    "#cc0000"   
# [16] "#cc0000"    "#cc0000"    "#ff9933"    "#ff9933"    "mediumblue"
# [21] "mediumblue" "mediumblue" "gold"       "gold"
Figure 2: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.

Percentage of data within cluster is presented on the right side of the heatmap.

2.2 Jittered scatter plot: Lv 0

ggJdat <- stack(dat)
ggD <- data.table(ggJdat)

set.seed(1024)
s2 <- sample(dim(ggD)[1], 5e4)
tmp <- ggD[s2,]
tmp$ind <- as.factor(tmp$ind)
levels(tmp$ind) <- names(feat)[seq(1,144,by=6)][ford]

ggJ0 <- 
  ggplot(data = tmp, aes(x = ind, y = values)) +
  geom_point(alpha=0.75) + 
  geom_jitter(width = 1) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))
print(ggJ0)
Figure 3: Scatter Plot Level 0

The above scatter plot is a random sample of the data points.

2.3 Correlations: Lv 0

cmatfs <- cor(fs)
corrplot(cmatfs,method="color",tl.col=ccol[ford], tl.cex=1)
Figure 4: Correlation on untransformed F0 data, reordered by synapse type.

3 Level 1: K-means++ for \(K=2\).

We run a Hierachical K-means++ for \(K=2\) on the fs data with 4 levels.

set.seed(2^13)
L <- bhkmpp(dat,blevels=4)

3.1 Heat maps (Lv 1):

## Formatting data for heatmap
aggp <- aggregate(dat,by=list(lab=L[[1]]),FUN=mean)
aggp <- as.matrix(aggp[,-1])
rownames(aggp) <- clusterFraction(L[[1]])

The following are heatmaps generated from clustering via K-means++ (at level 1)

heatmap.2(as.matrix(aggp),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.") 
#  [1] "#197300"    "#197300"    "#197300"    "#197300"    "#197300"   
#  [6] "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"   
# [11] "#5ed155"    "#660000"    "#660000"    "#660000"    "#cc0000"   
# [16] "#cc0000"    "#cc0000"    "#ff9933"    "#ff9933"    "mediumblue"
# [21] "mediumblue" "mediumblue" "gold"       "gold"
Figure 5: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.

Percentage of data within cluster is presented on the right side of the heatmap.

3.2 Jittered scatter plot: Lv 1

ggJdat <- stack(dat)
ggD <- data.table(ggJdat)
ggD$label <- L[[1]]

ggCol <- brewer.pal(4,"Set1")[order(table(L[[1]]))]

tmp <- ggD[s2, ]
tmp$label <- as.factor(tmp$lab)
tmp$ind <- as.factor(tmp$ind)
levels(tmp$ind) <- names(feat)[seq(1,144,by=6)][ford]

cf1 <- data.frame(cf = clusterFraction(L[[1]]))

ggJ1 <- 
  ggplot(data = tmp, aes(x = ind, y = values, 
                         color = label)) +
  scale_color_manual(values=ggCol) + 
  geom_point(alpha=0.25, position=position_jitterdodge()) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  annotate("text", x = levels(tmp$ind)[c(2,20)], y = max(tmp$values)+25, 
           label= cf1[1:2,]) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))
print(ggJ1)
Figure 6: Scatter Plot Level 1

3.3 Within cluster correlations (Lv 1)

corkp1 <- cor(dat[L[[1]] == 1,])
corkp2 <- cor(dat[L[[1]] == 2,])
difCor12 <- sqrt((corkp1 - corkp2)^2)

layout(matrix(c(1,2,3,3), 4, 2, byrow=TRUE))
corrplot(corkp1,method="color",tl.col=ccol[ford], tl.cex=0.8)
title(sub = "cluster 1")
corrplot(corkp2,method="color",tl.col=ccol[ford], tl.cex=0.8)
title(sub = "cluster 2")
corrplot(difCor12,is.corr=FALSE,method="color",
         tl.col=ccol[ford], tl.cex=0.8, 
         col=colorRampPalette(c("white","white","darkorange"))(50))
title(sub = "cluster difference")
Figure 7: Within cluster correlations, clock-wise from top left, Cluster 1, Cluster 2, l2 distance between C1 and C2

Notice that the non-synaptic markers change very little between clusters. Also note that the correlations between (gad, VGAT, PV, Gephyr) and VGlut1 at both times change significantly between clusters.

3.4 Clusters and Spatial Location (Lv 1)

Using the location data and the results of K-means++ we show a 3d scatter plot colored accoding to cluster.

set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)

locs1 <- loc[s1,]
locs1$cluster <- L[[1]][s1]

plot3d(locs1$V1,locs1$V2,locs1$V3,
       col=brewer.pal(4,"Set1")[order(table(L[[1]]))][locs1$cluster],
       alpha=0.75,
       xlab='x', 
       ylab='y', 
       zlab='z')

subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocations", height=720, width=720)

4 Level 2: K-means++ for \(K=2\).

4.1 Within cluster correlations (Lv 2)

corLV2 <- lapply(c(1:4),function(x){cor(dat[L[[2]] == x,])})

difCor1112 <- sqrt((corLV2[[2]] - corLV2[[1]])^2)
difCor2122 <- sqrt((corLV2[[4]] - corLV2[[3]])^2)

layout(matrix(c(1,2,3,3,4,5,6,6), 4, 2, byrow=TRUE))
corrplot(corLV2[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title("Cluster 1")
corrplot(corLV2[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title("Cluster 2")
corrplot(difCor1112, method="color", tl.col=ccol[ford], 
         tl.cex=0.8,
         mar = c(0,0,1,0),
         cl.lim = c(0,max(difCor1112,difCor2122)),
         col=colorRampPalette(c("white", 
                                "white",
                                "darkorange"))(100))
title("Difference(1,2)")
corrplot(corLV2[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title("Cluster 3")
corrplot(corLV2[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title("Cluster 4")
corrplot(difCor2122, method="color", tl.col=ccol[ford], 
         tl.cex=0.8,
         mar=c(0,0,1,0),
         cl.lim = c(0,max(difCor1112,difCor2122)),
         col=colorRampPalette(c("white", 
                                "white",
                                "darkorange"))(100))
title("Difference(3,4)")
Figure 8: Within cluster correlations for level 2. (c11, c12, c21, c22)

4.2 Heat maps (Lv 2):

## Formatting data for heatmap
aggp2 <- aggregate(dat,by=list(lab=L[[2]]),FUN=function(x){mean(x)}) 
aggp2 <- as.matrix(aggp2[,-1])
rownames(aggp2) <- clusterFraction(L[[2]])

The following are heatmaps generated from clustering via K-means++

heatmap.2(as.matrix(aggp2),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.") 
#  [1] "#197300"    "#197300"    "#197300"    "#197300"    "#197300"   
#  [6] "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"   
# [11] "#5ed155"    "#660000"    "#660000"    "#660000"    "#cc0000"   
# [16] "#cc0000"    "#cc0000"    "#ff9933"    "#ff9933"    "mediumblue"
# [21] "mediumblue" "mediumblue" "gold"       "gold"
Figure 9: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.

Percentage of data within cluster is presented on the right side of the heatmap.

4.3 Jittered scatter plot: Lv 2

ggJdat <- stack(dat)
ggD <- data.table(ggJdat)
ggD$label <- L[[2]]

ggCol <- brewer.pal(8,"Set1")[order(table(L[[2]]))]

tmp <- ggD[s2, ]
tmp$label <- as.factor(tmp$lab)
tmp$ind <- as.factor(tmp$ind)
levels(tmp$ind) <- names(feat)[seq(1,144,by=6)][ford]

cf2 <- data.frame(cf = clusterFraction(L[[2]]))
ggJ2 <- 
  ggplot(data = tmp, aes(x = ind, y = values, 
                         color = label)) +
  scale_color_manual(values=ggCol) + 
  geom_point(alpha=0.25, position=position_jitterdodge()) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  annotate("text", x = levels(tmp$ind)[c(2,8,14,20)], y = max(tmp$values)+25, 
           label= cf2[1:4,]) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))
print(ggJ2)
Figure 10: Scatter Plot Level 2

The fraction of data points within each cluster are given at the top of the plot window. ## Kernel Density Estimates of the marginals | cluster (Lv 2)

Here we look at the kernel density estimates within each cluster to compare.

4.4 Clusters and Spatial Location (Lv 2)

Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.

set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)

locs2 <- loc[s1,]
locs2$cluster <- L[[2]][s1]

YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
col.pal <- colorRampPalette(YlOrBr)

plot3d(locs2$V1,locs2$V2,locs2$V3,
       #col=colorpanel(8,"brown","blue")[order(table(L[[2]]))][locs2$cluster],
       col=col.pal(8)[-seq(1,8,2)][order(table(L[[2]]))][locs2$cluster],
       alpha=0.75,
       xlab='x', 
       ylab='y', 
       zlab='z'
       )

subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV2", height=720, width=720)

5 Level 3: K-means++ for \(K=2\).

5.1 Within cluster correlations (Lv 3)

corLV3 <- lapply(c(1:8),function(x){cor(dat[L[[3]] == x,])})

difCor1 <- sqrt((corLV3[[2]] - corLV3[[1]])^2)
difCor2 <- sqrt((corLV3[[4]] - corLV3[[3]])^2)
difCor3 <- sqrt((corLV3[[6]] - corLV3[[5]])^2)
difCor4 <- sqrt((corLV3[[8]] - corLV3[[7]])^2)
m <- max(difCor1, difCor2, difCor3, difCor4)

layout(matrix(c(1, 2, 3, 3,
                4, 5, 6, 6, 
                7, 8, 9, 9,
                10, 11, 12, 12), 8,2, byrow=TRUE))

corrplot(corLV3[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
         mar=c(0,0,1,0))
title('Cluster 1')
corrplot(corLV3[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
         mar=c(0,0,1,0))
title('Cluster 2')
corrplot(difCor1,method="color",tl.col=ccol[ford], tl.cex=0.8,
         cl.lim=c(0,m), 
         mar=c(0,0,1,0),
         col=colorRampPalette(c("white","white","darkorange"))(50))
title('Difference(1,2)')
corrplot(corLV3[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title('Cluster 3')
corrplot(corLV3[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title('Cluster 4')
corrplot(difCor2,method="color",tl.col=ccol[ford], tl.cex=0.8, 
         cl.lim= c(0,m),
         mar=c(0,0,1,0),
         col=colorRampPalette(c("white",
                                "white",
                                "darkorange"))(50))
title('Difference(3,4)')
corrplot(corLV3[[5]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title('Cluster 5')
corrplot(corLV3[[6]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title('Cluster 6')
corrplot(difCor3,method="color",tl.col=ccol[ford], tl.cex=0.8,
         cl.lim= c(0,m),
         mar=c(0,0,1,0),
         col=colorRampPalette(c("white",
                                "white",
                                "darkorange"))(50))
title('Difference(5,6)')
corrplot(corLV3[[7]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title('Cluster 7')
corrplot(corLV3[[8]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,1,0))
title('Cluster 8')
corrplot(difCor4,method="color",tl.col=ccol[ford], tl.cex=0.8,
         cl.lim= c(0,m),
         mar=c(0,0,1,0),
         col=colorRampPalette(c("white",
                                "white",
                                "darkorange"))(50))
title('Difference(7,8)')
Figure 11: Within cluster correlations for level 3. (c111, c112, c121, c122, c211, c212, c221, c222)

5.2 Heat maps (Lv 3):

## Formatting data for heatmap
aggp3 <- aggregate(dat,by=list(lab=L[[3]]),FUN=function(x){mean(x)})
aggp3 <- as.matrix(aggp3[,-1])
rownames(aggp3) <- clusterFraction(L[[3]])

The following are heatmaps generated from clustering via K-means++

heatmap.2(as.matrix(aggp3),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.") 
#  [1] "#197300"    "#197300"    "#197300"    "#197300"    "#197300"   
#  [6] "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"    "#5ed155"   
# [11] "#5ed155"    "#660000"    "#660000"    "#660000"    "#cc0000"   
# [16] "#cc0000"    "#cc0000"    "#ff9933"    "#ff9933"    "mediumblue"
# [21] "mediumblue" "mediumblue" "gold"       "gold"
Figure 12: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.

Percentage of data within cluster is presented on the right side of the heatmap.

5.3 Jittered scatter plot: Lv 3

ggJdat <- stack(dat)
ggD <- data.table(ggJdat)
ggD$label <- L[[3]]

ggCol <- brewer.pal(8,"Set1")[order(table(L[[3]]))]

tmp <- ggD[s2, ]
tmp$label <- as.factor(tmp$lab)
tmp$ind <- as.factor(tmp$ind)
levels(tmp$ind) <- names(feat)[seq(1,144,by=6)][ford]

ggJ3 <- 
  ggplot(data = tmp, aes(x = ind, y = values, 
                         color = label)) +
  scale_color_manual(values=ggCol) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  geom_point(alpha=0.25, position=position_jitterdodge()) + 
  #geom_jitter(width=2) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))
print(ggJ3)
Figure 13: Scatter Plot Level 3

5.4 Clusters and Spatial Location (Lv 3)

Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.

set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)

locs3 <- loc[s1,]
locs3$cluster <- L[[3]][s1]

plot3d(locs3$V1,locs3$V2,locs3$V3,
       col=col.pal(16)[-seq(1,8,2)][order(table(L[[3]]))][locs3$cluster],
       alpha=0.65,
       xlab='x', 
       ylab='y', 
       zlab='z'
       )

subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV3", height=720, width=720)